* This file calculates the optimal polynomial order to be used to conduct diagnosis heterogeneity analysis in Appendix F. 

use "$saveddata/data_complete_withcosts.dta", clear

* diagnosis 
replace d1 = 40 if d1==.

* split variable
gen split = d1

* collapse for analysis
collapse (count) freq = aeattenddisp (mean) tcount icount admit discharge* ///
		inpat_los = spell_los inpat_proc = number_proc ///
		death30 death90 death365 othexit ///
		d_* costem30_all costae30_all cost30_aeem mort, by(bin split)
		
egen sumfreq = sum(freq)
gen pfreq = freq/sumfreq
drop sumfreq

* exclusions: remove post-600 minutes (not relevant to polynomial fit)
drop if bin>600

* bin polynomial variables
forv x = 1/15 {
	gen bin`x' = bin^`x'
	}

* split into 60 minute blocks
gen period = 1 if bin<=60
replace period = 2 if bin>60 & bin<=120
replace period = 3 if bin>120 & bin<=180
replace period = 4 if bin>240 & bin<=300
replace period = 5 if bin>300 & bin<=360
replace period = 6 if bin>360 & bin<=420
replace period = 7 if bin>420 & bin<=480
replace period = 8 if bin>480 & bin<=540
replace period = 9 if bin>540 

* parameters
local start = 180
local endoh = 260
qui tab split	
mat parameters = J(`r(r)',18,.)		// columns = 2 + number of outcomes

* find optimal polynomials and end point for wait times
qui tab split
forv split = 1/`r(r)' {

	* results matrix
	mat results`split' = J(26,13,.)
	
	preserve
	qui keep if split==`split'
	di "Starting split = `split'"
		
	* wait time parameters
	forv porder = 3/15 {
		forv endloop = 250(10)500 {

			* drop variables created in loop
			cap drop pfreq_cf diff
			
			* compute predictions
			cap qui reg pfreq bin1-bin`porder' if (bin<`start' | bin>`endloop') 
			
			* store results
			local endloopi = `endloop'/10 - 24
			local porderi = `porder' - 2

			if _rc==506 {
				mat results`split'[`endloopi',`porderi'] = .
				}
				
			else {
				qui predict pfreq_cf, xb

				* difference in mass
				qui gen diff = pfreq - pfreq_cf 
				qui su diff if bin>=`start' & bin<=`endloop'
				mat results`split'[`endloopi',`porderi'] = `r(sum)'
				}
			}
		}
		
	* other outcome parameters
	local outcomei = 3
	foreach var of varlist admit discharge othexit icount tcount inpat_los inpat_proc costem30_all costae30_all cost30_aeem death30 death90 death365  d_death d_admit mort {
		matrix results = J(9,13,.)
		forv porder = 3/15 {
			forv x = 1/9 {
				qui { 
					cap reg `var' bin1-bin`porder' if (bin<`start' | bin>`endoh') & period!=`x' 
					
					local i = `porder' - 2
					
					if _rc==506 {
						mat results[`x',`i'] = .
						}
					else {
						predict `var'hat if period==`x', xb
						gen error2 = (`var' - `var'hat)^2
						su error2
						mat results[`x',`i'] = r(sum)
						drop `var'hat error2
						}
					}
				}
			}
		
		mata : st_matrix("mse", colsum(st_matrix("results")))

		local min = 1
		local porder = 3
		forv x = 2/13 {
			if mse[1,`x']<mse[1,`min'] {
				local porder = `x' + 2
				local min = `x'
				}
			}

		mat parameters[`split',`outcomei'] = `porder'
		local outcomei = `outcomei' + 1
		}

	restore
	}
	
* create lookup with results
qui tab split
forv x = 1/`r(r)' {
	clear
	svmat results`x'
	foreach var of varlist results* {
		replace `var' = abs(`var')
		}
	egen rowmin = rowmin(results*)
	gen end = (_n + 24)*10
	sort rowmin

	local end = end[1]
	local pcount = 3
	foreach var of varlist results* {
		if `var'[1]==rowmin[1] {
			local poly = `pcount'
			}
		local pcount = `pcount' + 1
		}
	
	mat parameters[`x',1] = `end'
	mat parameters[`x',2] = `poly'
	drop rowmin end
	}

clear
svmat parameters
rename parameters1 end
rename parameters2 polystar_wait

local varcount = 3
foreach var in admit discharge othexit icount tcount inpat_los inpat_proc costem30_all costae30_all cost30_aeem death30 death90 death365 d_death d_admit  mort {
	rename parameters`varcount' polystar_`var'
	local varcount = `varcount' + 1
	}
	
gen d1 = _n
order d1 end polystar_wait polystar_*

save "$saveddata/lookup_polystar_disagg_diag.dta", replace



